In this notebook, we are going to compute specific chromatin accesibility patterns for each of the cell types defined at level 1. To do this, we will use an differential accessible test (DA) followed by obtaining the chromatin signature per each group of cells. We used Signac and ChromVar tools to achieve the above goals.
library(Seurat)
library(Signac)
library(reshape)
library(ggplot2)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(patchwork)
library(chromVAR)
library(motifmatchr)
library(ggpubr)
library(dplyr)
library(tidyr)
set.seed(1234)
path_to_ATAC <- here::here("scATAC-seq/results/R_objects/8.2.tonsil_peakcalling_annotation_level1_integrated.rds")
path_to_save <- here::here("scATAC-seq/results/R_objects/8.3.tonsil_peakcalling_annotation_level1_signature.rds")
path_to_save_df <- here::here("scATAC-seq/results/files/2.da_peaks_level1.tsv")
seurat <- readRDS(path_to_ATAC)
seurat
## An object of class Seurat
## 270784 features across 101076 samples within 1 assay
## Active assay: peaks_macs (270784 features, 270784 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
table(seurat$annotation_level_1)
##
## NBC_MBC GCBC PC CD4_T Cytotoxic myeloid FDC PDC epithelial
## 41085 27497 2548 25011 3635 876 113 310 1
# Remove epithelial cell
toRemove = row.names(seurat@meta.data[which(seurat@meta.data$annotation_level_1 == "epithelial"),])
seurat <- seurat[,!colnames(seurat) %in% toRemove]
table(seurat$annotation_level_1)
##
## NBC_MBC GCBC PC CD4_T Cytotoxic myeloid FDC PDC epithelial
## 41085 27497 2548 25011 3635 876 113 310 0
DimPlot(
seurat, reduction = "umap",
group.by = "annotation_level_1",
cols = c("#a6cee3", "#1f78b4","#b2df8a",
"#33a02c", "#fb9a99","#e31a1c",
"#fdbf6f", "#ff7f00","#cab2d6",
"#6a3d9a"),
pt.size = 0.1
)
CoveragePlot(
object = seurat,
region = "chr19-42282829-42284493",
extend.upstream = 1000,
extend.downstream = 1000
)
To do that, we are going to compute all differentially accessible peaks between all the clusters defined at level1. We used a logistic regression, as suggested by Ntranos et.al 2018, adding the total number of fragments per peak as a latent variable to mitigate the effect of sequencing depth.
#differential.peaks <- FindAllMarkers(
# object = seurat,
# test.use = 'LR',
# min.pct = 0.2,
# only.pos = TRUE,
# latent.vars = 'nCount_peaks_macs'
#)
#write.table(differential.peaks, path_to_save_df, quote = F)
differential.peaks <- read.table(path_to_save_df)
Initially, we decided to filter out DA peaks that present an avg_log2FC less than 0.5. Then, the top 2000 DA peaks per group, defined at level 1, were considered the features to compute chromVAR deviations and thus add the chromatin signature in each cell-type.
chromatin_module <- function(seurat, top_peaks){
DA_filt <- differential.peaks[differential.peaks$avg_log2FC > 0.5, ]
DA_filt_2000 <- DA_filt %>% group_by(cluster) %>% top_n(n = top_peaks) #, wt = avg_log2FC)
features <- as.list(spread(DA_filt_2000, cluster, gene)[6:13])
features_na_omit <- lapply(features, function(x) x[!is.na(x)])
seurat <- AddChromatinModule(seurat,
features = features_na_omit,
genome = BSgenome.Hsapiens.UCSC.hg38)
return(seurat)}
seurat <- chromatin_module(seurat, 2000)
# Replace NA values per 0 to avoid problems in the representation
seurat@meta.data[c("NBC.MBC", "GCBC", "PC", "CD4.T","Cytotoxic",
"myeloid", "FDC", "PDC")][is.na(seurat@meta.data[c("NBC.MBC","GCBC", "PC","CD4.T",
"Cytotoxic", "myeloid", "FDC", "PDC")])] <- 0
To visualize this signature, we plotted the scores using FeaturePlot, with a maximum cutoff of ‘q95’.
FeaturePlot(seurat, "NBC.MBC", max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, "GCBC", max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, "PC", max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, "CD4.T", max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, "Cytotoxic", max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, c("myeloid"), max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, c("FDC"), max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, c("PDC"), max.cutoff = "q95") +
scale_color_viridis_c(option = "magma")
saveRDS(seurat,path_to_save)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.1.2 dplyr_1.0.2 ggpubr_0.4.0 motifmatchr_1.10.0 chromVAR_1.10.0 patchwork_1.1.0 BSgenome.Hsapiens.UCSC.hg38_1.4.3 BSgenome_1.56.0 rtracklayer_1.48.0 Biostrings_2.56.0 XVector_0.28.0 GenomicRanges_1.40.0 GenomeInfoDb_1.24.0 IRanges_2.22.1 S4Vectors_0.26.0 BiocGenerics_0.34.0 TFBSTools_1.26.0 JASPAR2020_0.99.10 ggplot2_3.3.2 reshape_0.8.8 Signac_1.1.0.9000 Seurat_3.9.9.9010 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.1 SnowballC_0.7.0 GGally_2.0.0 R.methodsS3_1.8.1 nabor_0.5.0 bit64_4.0.5 knitr_1.30 irlba_2.3.3 DelayedArray_0.14.0 R.utils_2.10.1 data.table_1.13.2 rpart_4.1-15 KEGGREST_1.28.0 RCurl_1.98-1.2 AnnotationFilter_1.12.0 generics_0.1.0 GenomicFeatures_1.40.1 cowplot_1.1.0 RSQLite_2.2.1 RANN_2.6.1 future_1.20.1 bit_4.0.4 spatstat.data_2.1-0 xml2_1.3.2 httpuv_1.5.4 SummarizedExperiment_1.18.1 assertthat_0.2.1 DirichletMultinomial_1.30.0 xfun_0.18 hms_0.5.3 evaluate_0.14 promises_1.1.1 progress_1.2.2 readxl_1.3.1 caTools_1.18.0 dbplyr_1.4.4 igraph_1.2.6 DBI_1.1.0 htmlwidgets_1.5.2 purrr_0.3.4 ellipsis_0.3.1 backports_1.2.0
## [43] bookdown_0.21 annotate_1.66.0 biomaRt_2.44.4 deldir_0.2-3 vctrs_0.3.4 Biobase_2.48.0 here_1.0.1 ensembldb_2.12.1 ROCR_1.0-11 abind_1.4-5 withr_2.3.0 ggforce_0.3.2 checkmate_2.0.0 sctransform_0.3.1 GenomicAlignments_1.24.0 prettyunits_1.1.1 goftest_1.2-2 cluster_2.1.0 lazyeval_0.2.2 seqLogo_1.54.3 crayon_1.3.4 labeling_0.4.2 pkgconfig_2.0.3 tweenr_1.0.1 nlme_3.1-150 ProtGenerics_1.20.0 nnet_7.3-14 rlang_0.4.11 globals_0.13.1 lifecycle_0.2.0 miniUI_0.1.1.1 BiocFileCache_1.12.1 rsvd_1.0.3 dichromat_2.0-0 rprojroot_2.0.2 cellranger_1.1.0 polyclip_1.10-0 matrixStats_0.57.0 lmtest_0.9-38 graph_1.66.0 Matrix_1.3-4 ggseqlogo_0.1
## [85] carData_3.0-4 zoo_1.8-8 base64enc_0.1-3 ggridges_0.5.2 png_0.1-7 viridisLite_0.3.0 bitops_1.0-6 R.oo_1.24.0 KernSmooth_2.23-17 blob_1.2.1 stringr_1.4.0 parallelly_1.21.0 rstatix_0.6.0 readr_1.4.0 jpeg_0.1-8.1 ggsignif_0.6.0 CNEr_1.24.0 scales_1.1.1 memoise_1.1.0 magrittr_1.5 plyr_1.8.6 ica_1.0-2 zlibbioc_1.34.0 compiler_4.0.3 RColorBrewer_1.1-2 fitdistrplus_1.1-1 Rsamtools_2.4.0 listenv_0.8.0 pbapply_1.4-3 htmlTable_2.1.0 Formula_1.2-4 MASS_7.3-53 mgcv_1.8-33 tidyselect_1.1.0 forcats_0.5.0 stringi_1.5.3 yaml_2.2.1 askpass_1.1 latticeExtra_0.6-29 ggrepel_0.8.2 grid_4.0.3 VariantAnnotation_1.34.0
## [127] fastmatch_1.1-0 tools_4.0.3 rio_0.5.16 future.apply_1.6.0 rstudioapi_0.12 TFMPvalue_0.0.8 foreign_0.8-80 lsa_0.73.2 gridExtra_2.3 farver_2.0.3 Rtsne_0.15 digest_0.6.27 BiocManager_1.30.10 shiny_1.5.0 pracma_2.2.9 Rcpp_1.0.5 car_3.0-10 broom_0.7.2 later_1.1.0.1 RcppAnnoy_0.0.16 OrganismDbi_1.30.0 httr_1.4.2 AnnotationDbi_1.50.3 ggbio_1.36.0 biovizBase_1.36.0 colorspace_2.0-0 XML_3.99-0.3 tensor_1.5 reticulate_1.18 splines_4.0.3 uwot_0.1.8.9001 RBGL_1.64.0 RcppRoll_0.3.0 spatstat.utils_2.1-0 plotly_4.9.2.1 xtable_1.8-4 jsonlite_1.7.1 poweRlaw_0.70.6 spatstat_1.64-1 R6_2.5.0 Hmisc_4.4-1 pillar_1.4.6
## [169] htmltools_0.5.1.1 mime_0.9 glue_1.4.2 fastmap_1.0.1 DT_0.16 BiocParallel_1.22.0 codetools_0.2-17 lattice_0.20-41 tibble_3.0.4 curl_4.3 leiden_0.3.5 gtools_3.8.2 zip_2.1.1 openxlsx_4.2.3 GO.db_3.11.4 openssl_1.4.3 survival_3.2-7 rmarkdown_2.5 munsell_0.5.0 GenomeInfoDbData_1.2.3 haven_2.3.1 reshape2_1.4.4 gtable_0.3.0